library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
prestige %>% 
  skim()
── Data Summary ────────────────────────
                           Values    
Name                       Piped data
Number of rows             102       
Number of columns          6         
_______________________              
Column type frequency:               
  factor                   1         
  numeric                  5         
________________________             
Group variables            None      

Drop NAs and remove ‘census’ variable

Feature engineering / variable transformation

prestige_log <- prestige %>% 
  mutate(ln_women = log(0.1 + women),
         ln_income = log(income))

skim(prestige_log)
── Data Summary ────────────────────────
                           Values      
Name                       prestige_log
Number of rows             98          
Number of columns          7           
_______________________                
Column type frequency:                 
  factor                   1           
  numeric                  6           
________________________               
Group variables            None        
prestige %>% 
  select(prestige, everything()) %>% 
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [===>------------------------------------------------------------------------------------------------------]  4% est: 0s 
 plot: [1,2] [=======>--------------------------------------------------------------------------------------------------]  8% est: 1s 
 plot: [1,3] [============>---------------------------------------------------------------------------------------------] 12% est: 1s 
 plot: [1,4] [================>-----------------------------------------------------------------------------------------] 16% est: 1s 
 plot: [1,5] [====================>-------------------------------------------------------------------------------------] 20% est: 1s 
 plot: [2,1] [========================>---------------------------------------------------------------------------------] 24% est: 1s 
 plot: [2,2] [=============================>----------------------------------------------------------------------------] 28% est: 1s 
 plot: [2,3] [=================================>------------------------------------------------------------------------] 32% est: 1s 
 plot: [2,4] [=====================================>--------------------------------------------------------------------] 36% est: 1s 
 plot: [2,5] [=========================================>----------------------------------------------------------------] 40% est: 1s 
 plot: [3,1] [==============================================>-----------------------------------------------------------] 44% est: 1s 
 plot: [3,2] [==================================================>-------------------------------------------------------] 48% est: 1s 
 plot: [3,3] [======================================================>---------------------------------------------------] 52% est: 1s 
 plot: [3,4] [==========================================================>-----------------------------------------------] 56% est: 1s 
 plot: [3,5] [===============================================================>------------------------------------------] 60% est: 1s 
 plot: [4,1] [===================================================================>--------------------------------------] 64% est: 1s 
 plot: [4,2] [=======================================================================>----------------------------------] 68% est: 1s 
 plot: [4,3] [===========================================================================>------------------------------] 72% est: 0s 
 plot: [4,4] [================================================================================>-------------------------] 76% est: 0s 
 plot: [4,5] [====================================================================================>---------------------] 80% est: 0s 
 plot: [5,1] [========================================================================================>-----------------] 84% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,2] [============================================================================================>-------------] 88% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,3] [=================================================================================================>--------] 92% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,4] [=====================================================================================================>----] 96% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [==========================================================================================================]100% est: 0s 
                                                                                                                                      

prestige_log %>% 
  select(prestige, everything()) %>% 
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [=>--------------------------------------------------------------------------------------------------------]  2% est: 0s 
 plot: [1,2] [===>------------------------------------------------------------------------------------------------------]  4% est: 3s 
 plot: [1,3] [=====>----------------------------------------------------------------------------------------------------]  6% est: 2s 
 plot: [1,4] [========>-------------------------------------------------------------------------------------------------]  8% est: 4s 
 plot: [1,5] [==========>-----------------------------------------------------------------------------------------------] 10% est: 3s 
 plot: [1,6] [============>---------------------------------------------------------------------------------------------] 12% est: 3s 
 plot: [1,7] [==============>-------------------------------------------------------------------------------------------] 14% est: 3s 
 plot: [2,1] [================>-----------------------------------------------------------------------------------------] 16% est: 3s 
 plot: [2,2] [==================>---------------------------------------------------------------------------------------] 18% est: 3s 
 plot: [2,3] [=====================>------------------------------------------------------------------------------------] 20% est: 3s 
 plot: [2,4] [=======================>----------------------------------------------------------------------------------] 22% est: 3s 
 plot: [2,5] [=========================>--------------------------------------------------------------------------------] 24% est: 3s 
 plot: [2,6] [===========================>------------------------------------------------------------------------------] 27% est: 3s 
 plot: [2,7] [=============================>----------------------------------------------------------------------------] 29% est: 2s 
 plot: [3,1] [===============================>--------------------------------------------------------------------------] 31% est: 2s 
 plot: [3,2] [==================================>-----------------------------------------------------------------------] 33% est: 2s 
 plot: [3,3] [====================================>---------------------------------------------------------------------] 35% est: 2s 
 plot: [3,4] [======================================>-------------------------------------------------------------------] 37% est: 2s 
 plot: [3,5] [========================================>-----------------------------------------------------------------] 39% est: 2s 
 plot: [3,6] [==========================================>---------------------------------------------------------------] 41% est: 2s 
 plot: [3,7] [============================================>-------------------------------------------------------------] 43% est: 2s 
 plot: [4,1] [===============================================>----------------------------------------------------------] 45% est: 2s 
 plot: [4,2] [=================================================>--------------------------------------------------------] 47% est: 2s 
 plot: [4,3] [===================================================>------------------------------------------------------] 49% est: 2s 
 plot: [4,4] [=====================================================>----------------------------------------------------] 51% est: 2s 
 plot: [4,5] [=======================================================>--------------------------------------------------] 53% est: 2s 
 plot: [4,6] [=========================================================>------------------------------------------------] 55% est: 1s 
 plot: [4,7] [============================================================>---------------------------------------------] 57% est: 1s 
 plot: [5,1] [==============================================================>-------------------------------------------] 59% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,2] [================================================================>-----------------------------------------] 61% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,3] [==================================================================>---------------------------------------] 63% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,4] [====================================================================>-------------------------------------] 65% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [======================================================================>-----------------------------------] 67% est: 1s 
 plot: [5,6] [=========================================================================>--------------------------------] 69% est: 1s 
 plot: [5,7] [===========================================================================>------------------------------] 71% est: 1s 
 plot: [6,1] [=============================================================================>----------------------------] 73% est: 1s 
 plot: [6,2] [===============================================================================>--------------------------] 76% est: 1s 
 plot: [6,3] [=================================================================================>------------------------] 78% est: 1s 
 plot: [6,4] [===================================================================================>----------------------] 80% est: 1s 
 plot: [6,5] [======================================================================================>-------------------] 82% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,6] [========================================================================================>-----------------] 84% est: 1s 
 plot: [6,7] [==========================================================================================>---------------] 86% est: 1s 
 plot: [7,1] [============================================================================================>-------------] 88% est: 0s 
 plot: [7,2] [==============================================================================================>-----------] 90% est: 0s 
 plot: [7,3] [================================================================================================>---------] 92% est: 0s 
 plot: [7,4] [===================================================================================================>------] 94% est: 0s 
 plot: [7,5] [=====================================================================================================>----] 96% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,6] [=======================================================================================================>--] 98% est: 0s 
 plot: [7,7] [==========================================================================================================]100% est: 0s 
                                                                                                                                      

Investigate the large outliers for income:

prestige %>% 
  slice_max(income, n = 10)

Start building our model:

Best predictor first

mod1a <- lm(prestige ~ education,
            data = prestige)

summary(mod1a)

Call:
lm(formula = prestige ~ education, data = prestige)

Residuals:
    Min      1Q  Median      3Q     Max 
-21.605  -6.151   0.366   6.565  17.540 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -10.8409     3.5285  -3.072  0.00276 ** 
education     5.3884     0.3168  17.006  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.578 on 96 degrees of freedom
Multiple R-squared:  0.7508,    Adjusted R-squared:  0.7482 
F-statistic: 289.2 on 1 and 96 DF,  p-value: < 2.2e-16

For every unit increase in education, prestige increases 5.3 if all other variables are held constant. Education can explain 75% of variation in prestige

autoplot(mod1a)

All plots look good - we could accept these

mod1b <- lm(prestige ~ type,
            data = prestige)

summary(mod1b)

Call:
lm(formula = prestige ~ type, data = prestige)

Residuals:
     Min       1Q   Median       3Q      Max 
-18.2273  -7.1773  -0.0854   6.1174  25.2565 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   35.527      1.432  24.810  < 2e-16 ***
typeprof      32.321      2.227  14.511  < 2e-16 ***
typewc         6.716      2.444   2.748  0.00718 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9.499 on 95 degrees of freedom
Multiple R-squared:  0.6976,    Adjusted R-squared:  0.6913 
F-statistic: 109.6 on 2 and 95 DF,  p-value: < 2.2e-16
autoplot(mod1b)

Although both models look good, mod1a is better (based on r^2 and RSE), so we would choose ‘education’ as our first predictor

now we want to see which variables describe the ‘residue’ - the unexplained model error

change the variable of interest

prestige_resid <- prestige_log %>% 
  add_residuals(mod1a) %>% 
  select(-c(prestige, education))

prestige_resid %>% 
  select(resid, everything()) %>% 
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [==>------------------------------------------------------------------------------------------------------]  3% est: 0s 
 plot: [1,2] [=====>---------------------------------------------------------------------------------------------------]  6% est: 2s 
 plot: [1,3] [========>------------------------------------------------------------------------------------------------]  8% est: 2s 
 plot: [1,4] [===========>---------------------------------------------------------------------------------------------] 11% est: 2s 
 plot: [1,5] [==============>------------------------------------------------------------------------------------------] 14% est: 2s 
 plot: [1,6] [=================>---------------------------------------------------------------------------------------] 17% est: 2s 
 plot: [2,1] [===================>-------------------------------------------------------------------------------------] 19% est: 2s 
 plot: [2,2] [======================>----------------------------------------------------------------------------------] 22% est: 2s 
 plot: [2,3] [=========================>-------------------------------------------------------------------------------] 25% est: 2s 
 plot: [2,4] [============================>----------------------------------------------------------------------------] 28% est: 2s 
 plot: [2,5] [===============================>-------------------------------------------------------------------------] 31% est: 2s 
 plot: [2,6] [==================================>----------------------------------------------------------------------] 33% est: 2s 
 plot: [3,1] [=====================================>-------------------------------------------------------------------] 36% est: 2s 
 plot: [3,2] [========================================>----------------------------------------------------------------] 39% est: 2s 
 plot: [3,3] [===========================================>-------------------------------------------------------------] 42% est: 2s 
 plot: [3,4] [==============================================>----------------------------------------------------------] 44% est: 1s 
 plot: [3,5] [=================================================>-------------------------------------------------------] 47% est: 1s 
 plot: [3,6] [===================================================>-----------------------------------------------------] 50% est: 1s 
 plot: [4,1] [======================================================>--------------------------------------------------] 53% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,2] [=========================================================>-----------------------------------------------] 56% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,3] [============================================================>--------------------------------------------] 58% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,4] [===============================================================>-----------------------------------------] 61% est: 1s 
 plot: [4,5] [==================================================================>--------------------------------------] 64% est: 1s 
 plot: [4,6] [=====================================================================>-----------------------------------] 67% est: 1s 
 plot: [5,1] [========================================================================>--------------------------------] 69% est: 1s 
 plot: [5,2] [===========================================================================>-----------------------------] 72% est: 1s 
 plot: [5,3] [==============================================================================>--------------------------] 75% est: 1s 
 plot: [5,4] [=================================================================================>-----------------------] 78% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [====================================================================================>--------------------] 81% est: 1s 
 plot: [5,6] [=======================================================================================>-----------------] 83% est: 1s 
 plot: [6,1] [=========================================================================================>---------------] 86% est: 0s 
 plot: [6,2] [============================================================================================>------------] 89% est: 0s 
 plot: [6,3] [===============================================================================================>---------] 92% est: 0s 
 plot: [6,4] [==================================================================================================>------] 94% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,5] [=====================================================================================================>---] 97% est: 0s 
 plot: [6,6] [=========================================================================================================]100% est: 0s 
                                                                                                                                     

things to model next based on this:

add second predictor - the one that explains the most residual error

mod2a<- lm(prestige ~ education + ln_income,
           data = prestige_log)

summary(mod2a)

Call:
lm(formula = prestige ~ education + ln_income, data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.6775  -4.0506  -0.2538   4.0648  16.7992 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -101.1882    12.8490  -7.875 5.51e-12 ***
education      4.0375     0.3173  12.726  < 2e-16 ***
ln_income     12.0564     1.6719   7.211 1.33e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.932 on 95 degrees of freedom
Multiple R-squared:  0.8389,    Adjusted R-squared:  0.8356 
F-statistic: 247.4 on 2 and 95 DF,  p-value: < 2.2e-16
autoplot(mod2a)

mod2b<- lm(prestige ~ education + income,
           data = prestige_log)

summary(mod2b)

Call:
lm(formula = prestige ~ education + income, data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.9367  -4.8881   0.0116   4.9690  15.9280 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -7.6210352  3.1162309  -2.446   0.0163 *  
education    4.2921076  0.3360645  12.772  < 2e-16 ***
income       0.0012415  0.0002185   5.682 1.45e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.45 on 95 degrees of freedom
Multiple R-squared:  0.814, Adjusted R-squared:  0.8101 
F-statistic: 207.9 on 2 and 95 DF,  p-value: < 2.2e-16
autoplot(mod2b)

We can reject this model in favour of mod2a

mod2c<- lm(prestige ~ education + type,
           data = prestige_log)

summary(mod2c)

Call:
lm(formula = prestige ~ education + type, data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-19.410  -5.508   1.360   5.694  17.171 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -2.6982     5.7361  -0.470   0.6392    
education     4.5728     0.6716   6.809 9.16e-10 ***
typeprof      6.1424     4.2590   1.442   0.1526    
typewc       -5.4585     2.6907  -2.029   0.0453 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.814 on 94 degrees of freedom
Multiple R-squared:  0.7975,    Adjusted R-squared:  0.791 
F-statistic: 123.4 on 3 and 94 DF,  p-value: < 2.2e-16
autoplot(mod2c)

when ‘type’ categories have different levels of significance, either use all or none of them - DON”T CHERRY PICK

We can reject this model in favour of mod2a

Results:

check for significance of categorical: ANOVA test

anova(mod1a, mod2c)
Analysis of Variance Table

Model 1: prestige ~ education
Model 2: prestige ~ education + type
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1     96 7064.4                                  
2     94 5740.0  2    1324.4 10.844 5.787e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

the means of the categorical levels are statistically different, therefore all 3 levels could be included

third predictor

prestige_resid <- prestige_log %>% 
  add_residuals(mod2a) %>% 
  select(-c(prestige, education, ln_income))

prestige_resid %>% 
  select(resid, everything()) %>%
  ggpairs(aes(colour = type,
              alpha = 0.5))

 plot: [1,1] [===>-----------------------------------------------------------------------------------------------------]  4% est: 0s 
 plot: [1,2] [=======>-------------------------------------------------------------------------------------------------]  8% est: 1s 
 plot: [1,3] [============>--------------------------------------------------------------------------------------------] 12% est: 1s 
 plot: [1,4] [================>----------------------------------------------------------------------------------------] 16% est: 1s 
 plot: [1,5] [====================>------------------------------------------------------------------------------------] 20% est: 1s 
 plot: [2,1] [========================>--------------------------------------------------------------------------------] 24% est: 1s 
 plot: [2,2] [============================>----------------------------------------------------------------------------] 28% est: 1s 
 plot: [2,3] [=================================>-----------------------------------------------------------------------] 32% est: 1s 
 plot: [2,4] [=====================================>-------------------------------------------------------------------] 36% est: 1s 
 plot: [2,5] [=========================================>---------------------------------------------------------------] 40% est: 1s 
 plot: [3,1] [=============================================>-----------------------------------------------------------] 44% est: 1s 
 plot: [3,2] [=================================================>-------------------------------------------------------] 48% est: 1s 
 plot: [3,3] [======================================================>--------------------------------------------------] 52% est: 1s 
 plot: [3,4] [==========================================================>----------------------------------------------] 56% est: 1s 
 plot: [3,5] [==============================================================>------------------------------------------] 60% est: 1s 
 plot: [4,1] [==================================================================>--------------------------------------] 64% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,2] [======================================================================>----------------------------------] 68% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,3] [===========================================================================>-----------------------------] 72% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [4,4] [===============================================================================>-------------------------] 76% est: 0s 
 plot: [4,5] [===================================================================================>---------------------] 80% est: 0s 
 plot: [5,1] [=======================================================================================>-----------------] 84% est: 0s 
 plot: [5,2] [===========================================================================================>-------------] 88% est: 0s 
 plot: [5,3] [================================================================================================>--------] 92% est: 0s 
 plot: [5,4] [====================================================================================================>----] 96% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [5,5] [=========================================================================================================]100% est: 0s 
                                                                                                                                     

things to model next based on this:

mod3a <- lm(prestige ~ education + ln_income + type,
           data = prestige_log)

summary(mod3a)

Call:
lm(formula = prestige ~ education + ln_income + type, data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.511  -3.746   1.011   4.356  18.438 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -81.2019    13.7431  -5.909 5.63e-08 ***
education     3.2845     0.6081   5.401 5.06e-07 ***
ln_income    10.4875     1.7167   6.109 2.31e-08 ***
typeprof      6.7509     3.6185   1.866   0.0652 .  
typewc       -1.4394     2.3780  -0.605   0.5465    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.637 on 93 degrees of freedom
Multiple R-squared:  0.8555,    Adjusted R-squared:  0.8493 
F-statistic: 137.6 on 4 and 93 DF,  p-value: < 2.2e-16
autoplot(mod3a)

do an anova test on types

anova(mod2a, mod3a)
Analysis of Variance Table

Model 1: prestige ~ education + ln_income
Model 2: prestige ~ education + ln_income + type
  Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
1     95 4565.4                                
2     93 4096.3  2    469.07 5.3247 0.006465 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

the results are significant - we can include

add type to model

mod3b <- lm(prestige ~ education + ln_income + women,
           data = prestige_log)

summary(mod3b)

Call:
lm(formula = prestige ~ education + ln_income + women, data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.8202  -4.7019   0.0696   4.2245  17.6833 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -129.16790   18.95716  -6.814 8.97e-10 ***
education      3.59404    0.38431   9.352 4.39e-15 ***
ln_income     15.60547    2.43246   6.416 5.62e-09 ***
women          0.06481    0.03270   1.982   0.0504 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.828 on 94 degrees of freedom
Multiple R-squared:  0.8454,    Adjusted R-squared:  0.8405 
F-statistic: 171.4 on 3 and 94 DF,  p-value: < 2.2e-16
autoplot(mod3b)

Interactions (between main effects)

prestige_resid <- prestige_log %>% 
  add_residuals(mod3a) %>% 
  select(-prestige)
coplot(resid ~ ln_income | education,
       panel = function(x, y, ...) {
         points(x, y)
         abline(lm(y ~ x), col = "blue")
       },
       data = prestige_resid,
       rows = 1)

prestige_resid %>% 
  ggplot(aes(x = education,
             y = resid,
             colour = type)) +
  geom_point()+
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula 'y ~ x'

prestige_resid %>% 
  ggplot(aes(x = ln_income,
             y = resid,
             colour = type)) +
  geom_point()+
  geom_smooth(method = "lm", se = FALSE)
`geom_smooth()` using formula 'y ~ x'

Including the interactions

mod4a <- lm(prestige ~ education + ln_income + type + education:ln_income,
            data = prestige_log)

summary(mod4a)

Call:
lm(formula = prestige ~ education + ln_income + type + education:ln_income, 
    data = prestige_log)

Residuals:
     Min       1Q   Median       3Q      Max 
-13.6129  -4.1461   0.7695   4.1546  17.9453 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         -166.7630    51.0506  -3.267 0.001530 ** 
education             11.1444     4.5601   2.444 0.016438 *  
ln_income             20.2741     5.8790   3.449 0.000852 ***
typeprof               6.8626     3.5803   1.917 0.058374 .  
typewc                -2.3516     2.4103  -0.976 0.331796    
education:ln_income   -0.8892     0.5114  -1.739 0.085414 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.566 on 92 degrees of freedom
Multiple R-squared:  0.8601,    Adjusted R-squared:  0.8525 
F-statistic: 113.1 on 5 and 92 DF,  p-value: < 2.2e-16
mod4b <- lm(prestige ~ education + ln_income + type + education:type,
            data = prestige_log)

summary(mod4b)

Call:
lm(formula = prestige ~ education + ln_income + type + education:type, 
    data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.962  -3.797   1.041   4.092  16.503 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -81.6672    14.3681  -5.684 1.57e-07 ***
education            3.1407     0.9004   3.488 0.000751 ***
ln_income           10.6833     1.7108   6.245 1.33e-08 ***
typeprof            15.6176    14.2168   1.099 0.274871    
typewc             -30.4466    18.3465  -1.660 0.100451    
education:typeprof  -0.5801     1.2211  -0.475 0.635887    
education:typewc     2.6675     1.7551   1.520 0.132018    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.585 on 91 degrees of freedom
Multiple R-squared:  0.8608,    Adjusted R-squared:  0.8516 
F-statistic: 93.79 on 6 and 91 DF,  p-value: < 2.2e-16
mod4c <- lm(prestige ~ education + ln_income + type + type:ln_income,
            data = prestige_log)

summary(mod4c)

Call:
lm(formula = prestige ~ education + ln_income + type + type:ln_income, 
    data = prestige_log)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.484  -4.453   1.122   4.123  18.737 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -118.4325    20.3728  -5.813 8.97e-08 ***
education             3.2107     0.5993   5.357 6.31e-07 ***
ln_income            14.9336     2.4928   5.991 4.12e-08 ***
typeprof             82.7757    31.5059   2.627   0.0101 *  
typewc               51.3717    36.8521   1.394   0.1667    
ln_income:typeprof   -8.5690     3.5251  -2.431   0.0170 *  
ln_income:typewc     -6.1925     4.3172  -1.434   0.1549    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.491 on 91 degrees of freedom
Multiple R-squared:  0.8647,    Adjusted R-squared:  0.8558 
F-statistic: 96.96 on 6 and 91 DF,  p-value: < 2.2e-16
autoplot(mod4c)

anova(mod3a, mod4c)
Analysis of Variance Table

Model 1: prestige ~ education + ln_income + type
Model 2: prestige ~ education + ln_income + type + type:ln_income
  Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
1     93 4096.3                              
2     91 3834.2  2    262.13 3.1107 0.04934 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Relative Importance

calc.relimp(mod3a, type = "lmg", rela = TRUE)
Response variable: prestige 
Total response variance: 292.2358 
Analysis based on 98 observations 

4 Regressors: 
Some regressors combined in groups: 
        Group  type : typeprof typewc 

 Relative importance of 3 (groups of) regressors assessed: 
 type education ln_income 
 
Proportion of variance explained by model: 85.55%
Metrics are normalized to sum to 100% (rela=TRUE). 

Relative importance metrics: 

                lmg
type      0.3352553
education 0.3831467
ln_income 0.2815980

Average coefficients for different model sizes: 

             1group   2groups   3groups
education  5.388408  4.305159  3.284486
ln_income 24.619472 12.879804 10.487471
typeprof  32.321114 14.810837  6.750887
typewc     6.716206  1.013706 -1.439403
calc.relimp(mod4c, type = "lmg", rela = TRUE)
Response variable: prestige 
Total response variance: 292.2358 
Analysis based on 98 observations 

6 Regressors: 
Some regressors combined in groups: 
        Group  type : typeprof typewc 
        Group  ln_income:type : ln_income:typeprof ln_income:typewc 

 Relative importance of 4 (groups of) regressors assessed: 
 type ln_income:type education ln_income 
 
Proportion of variance explained by model: 86.47%
Metrics are normalized to sum to 100% (rela=TRUE). 

Relative importance metrics: 

                      lmg
type           0.38512449
ln_income:type 0.01146495
education      0.29662098
ln_income      0.30678958

Average coefficients for different model sizes: 

                      1group   2groups   3groups   4groups
education           5.388408  4.305159  3.284486  3.210707
ln_income          24.619472 12.879804 14.634783 14.933637
typeprof           32.321114 14.810837 54.690682 82.775730
typewc              6.716206  1.013706 41.131305 51.371716
ln_income:typeprof       NaN       NaN -9.001091 -8.568986
ln_income:typewc         NaN       NaN -8.979324 -6.192503
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjYXIpCmxpYnJhcnkobW9kZWxyKQpsaWJyYXJ5KHNraW1yKQpsaWJyYXJ5KEdHYWxseSkKbGlicmFyeShnZ2ZvcnRpZnkpCmBgYAoKCmBgYHtyfQpwcmVzdGlnZSA8LSBQcmVzdGlnZQpgYGAKCgpgYGB7cn0KcHJlc3RpZ2UgJT4lIAogIHNraW0oKQpgYGAKCkRyb3AgTkFzIGFuZCByZW1vdmUgJ2NlbnN1cycgdmFyaWFibGUKCmBgYHtyfQpwcmVzdGlnZSA8LSBQcmVzdGlnZSAlPiUgCiAgZHJvcF9uYSgpICU+JSAKICBzZWxlY3QoLWNlbnN1cykKYGBgCgoKRmVhdHVyZSBlbmdpbmVlcmluZyAvIHZhcmlhYmxlIHRyYW5zZm9ybWF0aW9uCgoqIExvZ2dlZCBzb21lIHBvc2l0aXZlbHkgc2tld2VkIG51bWVyaWNzCiAgKyB3b21lbgogICsgaW5jb21lCgpgYGB7cn0KcHJlc3RpZ2VfbG9nIDwtIHByZXN0aWdlICU+JSAKICBtdXRhdGUobG5fd29tZW4gPSBsb2coMC4xICsgd29tZW4pLAogICAgICAgICBsbl9pbmNvbWUgPSBsb2coaW5jb21lKSkKCnNraW0ocHJlc3RpZ2VfbG9nKQpgYGAKCgpgYGB7cn0KcHJlc3RpZ2UgJT4lIAogIHNlbGVjdChwcmVzdGlnZSwgZXZlcnl0aGluZygpKSAlPiUgCiAgZ2dwYWlycyhhZXMoY29sb3VyID0gdHlwZSwKICAgICAgICAgICAgICBhbHBoYSA9IDAuNSkpCmBgYAoKCmBgYHtyfQpwcmVzdGlnZV9sb2cgJT4lIAogIHNlbGVjdChwcmVzdGlnZSwgZXZlcnl0aGluZygpKSAlPiUgCiAgZ2dwYWlycyhhZXMoY29sb3VyID0gdHlwZSwKICAgICAgICAgICAgICBhbHBoYSA9IDAuNSkpCmBgYAoKKiBCeSBsb29raW5nIGF0IHRoZSBsb2dzIHdlIGFyZSBhY2NvdW50aW5nIGZvciB0aGUgbGFyZ2Ugb3V0bGllcnMKKiBUaGlzIG1ha2VzIGl0IGxvb2sgbW9yZSBsaWtlIGEgbm9ybWFsIGRpc3RyaWJ1dGlvbgoqIENvcnJlbGF0aW9uIGFsc28gaW5jcmVhc2VzIGJ5IDAuMDUKKiBUaGUgc2NhdHRlciBwbG90IG9mIGxuX2luY29tZSBpcyBtdWNoIG1vcmUgJ25vcm1hbCcgdGhhbiBqdXN0IGluY29tZQoKCkludmVzdGlnYXRlIHRoZSBsYXJnZSBvdXRsaWVycyBmb3IgaW5jb21lOgoKYGBge3J9CnByZXN0aWdlICU+JSAKICBzbGljZV9tYXgoaW5jb21lLCBuID0gMTApCmBgYAoKClN0YXJ0IGJ1aWxkaW5nIG91ciBtb2RlbDoKCkJlc3QgcHJlZGljdG9yIGZpcnN0CgpgYGB7cn0KbW9kMWEgPC0gbG0ocHJlc3RpZ2UgfiBlZHVjYXRpb24sCiAgICAgICAgICAgIGRhdGEgPSBwcmVzdGlnZSkKCnN1bW1hcnkobW9kMWEpCmBgYAoKRm9yIGV2ZXJ5IHVuaXQgaW5jcmVhc2UgaW4gZWR1Y2F0aW9uLCBwcmVzdGlnZSBpbmNyZWFzZXMgNS4zIGlmIGFsbCBvdGhlciB2YXJpYWJsZXMgYXJlIGhlbGQgY29uc3RhbnQuCkVkdWNhdGlvbiBjYW4gZXhwbGFpbiA3NSUgb2YgdmFyaWF0aW9uIGluIHByZXN0aWdlCgpgYGB7cn0KYXV0b3Bsb3QobW9kMWEpCmBgYAoKQWxsIHBsb3RzIGxvb2sgZ29vZCAtIHdlIGNvdWxkIGFjY2VwdCB0aGVzZQoKCmBgYHtyfQptb2QxYiA8LSBsbShwcmVzdGlnZSB+IHR5cGUsCiAgICAgICAgICAgIGRhdGEgPSBwcmVzdGlnZSkKCnN1bW1hcnkobW9kMWIpCmBgYAoKKiBCbHVlLWNvbGxhcjogMzUuNQoqIFdoaXRlLWNvbGxhcjogNDIuMgoqIFByb2Zlc3Npb25hbDogNzQuNQoqIFJTRSBzbGlnaHRseSBoaWdoZXIgKDkuNSkKKiAkcl4yJCBzbGlnaHRseSBsb3dlciAoMC43KSAKCgpgYGB7cn0KYXV0b3Bsb3QobW9kMWIpCmBgYAoKQWx0aG91Z2ggYm90aCBtb2RlbHMgbG9vayBnb29kLCBtb2QxYSBpcyBiZXR0ZXIgKGJhc2VkIG9uIHJeMiBhbmQgUlNFKSwgc28gd2Ugd291bGQgY2hvb3NlICdlZHVjYXRpb24nIGFzIG91ciBmaXJzdCBwcmVkaWN0b3IKCgpub3cgd2Ugd2FudCB0byBzZWUgd2hpY2ggdmFyaWFibGVzIGRlc2NyaWJlIHRoZSAncmVzaWR1ZScgLSB0aGUgdW5leHBsYWluZWQgbW9kZWwgZXJyb3IKCmNoYW5nZSB0aGUgdmFyaWFibGUgb2YgaW50ZXJlc3QKCmBgYHtyfQpwcmVzdGlnZV9yZXNpZCA8LSBwcmVzdGlnZV9sb2cgJT4lIAogIGFkZF9yZXNpZHVhbHMobW9kMWEpICU+JSAKICBzZWxlY3QoLWMocHJlc3RpZ2UsIGVkdWNhdGlvbikpCgpwcmVzdGlnZV9yZXNpZCAlPiUgCiAgc2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpICU+JSAKICBnZ3BhaXJzKGFlcyhjb2xvdXIgPSB0eXBlLAogICAgICAgICAgICAgIGFscGhhID0gMC41KSkKYGBgCgp0aGluZ3MgdG8gbW9kZWwgbmV4dCBiYXNlZCBvbiB0aGlzOgoKKiBsbl9pbmNvbWUKKiBpbmNvbWUKKiB0eXBlCgphZGQgc2Vjb25kIHByZWRpY3RvciAtIHRoZSBvbmUgdGhhdCBleHBsYWlucyB0aGUgbW9zdCByZXNpZHVhbCBlcnJvcgoKYGBge3J9Cm1vZDJhPC0gbG0ocHJlc3RpZ2UgfiBlZHVjYXRpb24gKyBsbl9pbmNvbWUsCiAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kMmEpCgphdXRvcGxvdChtb2QyYSkKYGBgCgoqICRyXjIkIGlzIHVwIHRvIDgzLjQKKiBSU0UgaXMgZG93biB0byA2LjkKKiBlZHVjYXRpb24gY29lZmZpY2llbnQgaXMgcmVkdWNlZCB0byA0IChmcm9tIDUuMykKKiBjYW4ndCByZWFsbHkgY29tcGFyZSB0aGVzZSBjb2VmZmljaWVudHMgYXMgZWR1Y2F0aW9uIGlzIG1lYXN1cmVkIGluIHllYXJzIGFuZCBsbl9pbmNvbWUgaXMgYSBsb2cgb2YgZG9sbGFycyAKCgpgYGB7cn0KbW9kMmI8LSBsbShwcmVzdGlnZSB+IGVkdWNhdGlvbiArIGluY29tZSwKICAgICAgICAgICBkYXRhID0gcHJlc3RpZ2VfbG9nKQoKc3VtbWFyeShtb2QyYikKCmF1dG9wbG90KG1vZDJiKQpgYGAKCiogbGVzcyBwcmVkaWN0aXZlIG9mIGEgbW9kZWwKKiBlZHVjYXRpb24gY29lZmZpY2VudCBsZXNzIGVmZmVjdGVkCiogJHJeMiQgbGVzcwoqIFJFUyBoaWdoZXIKCldlIGNhbiAqKnJlamVjdCoqIHRoaXMgbW9kZWwgaW4gZmF2b3VyIG9mICoqbW9kMmEqKgoKCmBgYHtyfQptb2QyYzwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uICsgdHlwZSwKICAgICAgICAgICBkYXRhID0gcHJlc3RpZ2VfbG9nKQoKc3VtbWFyeShtb2QyYykKCmF1dG9wbG90KG1vZDJjKQpgYGAKCiogdHlwZSBsYWJlbHMgZXhwbGFpbiAqKmxlc3MqKiB3aGVuIGVkdWNhdGlvbiBpcyBjb250cm9sbGVkCiogJHJeMiQgaXMgZG93bgoqIFJTRSBpcyBoaWdoZXIKCndoZW4gJ3R5cGUnIGNhdGVnb3JpZXMgaGF2ZSBkaWZmZXJlbnQgbGV2ZWxzIG9mIHNpZ25pZmljYW5jZSwgZWl0aGVyIHVzZSAqKmFsbCoqIG9yICoqbm9uZSoqIG9mIHRoZW0gLSAqKkRPTiJUIENIRVJSWSBQSUNLKioKCldlIGNhbiAqKnJlamVjdCoqIHRoaXMgbW9kZWwgaW4gZmF2b3VyIG9mICoqbW9kMmEqKgoKUmVzdWx0czoKCiogKiptb2QyYSoqIGdhdmUgYmlnZ2VzdCB1cGxpZnQgaW4gJHJeMiQKKiByZXNpZHVhbHMgbG9vayBncmVhdAoKCmNoZWNrIGZvciBzaWduaWZpY2FuY2Ugb2YgY2F0ZWdvcmljYWw6IEFOT1ZBIHRlc3QKYGBge3J9CmFub3ZhKG1vZDFhLCBtb2QyYykKYGBgCgp0aGUgbWVhbnMgb2YgdGhlIGNhdGVnb3JpY2FsIGxldmVscyBhcmUgc3RhdGlzdGljYWxseSBkaWZmZXJlbnQsIHRoZXJlZm9yZSAqKmFsbCAzIGxldmVscyoqIGNvdWxkIGJlIGluY2x1ZGVkCgoKdGhpcmQgcHJlZGljdG9yCgpgYGB7cn0KcHJlc3RpZ2VfcmVzaWQgPC0gcHJlc3RpZ2VfbG9nICU+JSAKICBhZGRfcmVzaWR1YWxzKG1vZDJhKSAlPiUgCiAgc2VsZWN0KC1jKHByZXN0aWdlLCBlZHVjYXRpb24sIGxuX2luY29tZSkpCgpwcmVzdGlnZV9yZXNpZCAlPiUgCiAgc2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpICU+JQogIGdncGFpcnMoYWVzKGNvbG91ciA9IHR5cGUsCiAgICAgICAgICAgICAgYWxwaGEgPSAwLjUpKQpgYGAKCnRoaW5ncyB0byBtb2RlbCBuZXh0IGJhc2VkIG9uIHRoaXM6CgoqIHR5cGUKKiB3b21lbgoKYGBge3J9Cm1vZDNhIDwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uICsgbG5faW5jb21lICsgdHlwZSwKICAgICAgICAgICBkYXRhID0gcHJlc3RpZ2VfbG9nKQoKc3VtbWFyeShtb2QzYSkKCmF1dG9wbG90KG1vZDNhKQpgYGAKCmRvIGFuIGFub3ZhIHRlc3Qgb24gdHlwZXMKCmBgYHtyfQphbm92YShtb2QyYSwgbW9kM2EpCmBgYAoKdGhlIHJlc3VsdHMgYXJlIHNpZ25pZmljYW50IC0gd2UgY2FuIGluY2x1ZGUKCioqYWRkIHR5cGUgdG8gbW9kZWwqKgoKYGBge3J9Cm1vZDNiIDwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uICsgbG5faW5jb21lICsgd29tZW4sCiAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kM2IpCgphdXRvcGxvdChtb2QzYikKCmBgYAoKCkludGVyYWN0aW9ucyAoYmV0d2VlbiBtYWluIGVmZmVjdHMpCgpgYGB7cn0KcHJlc3RpZ2VfcmVzaWQgPC0gcHJlc3RpZ2VfbG9nICU+JSAKICBhZGRfcmVzaWR1YWxzKG1vZDNhKSAlPiUgCiAgc2VsZWN0KC1wcmVzdGlnZSkKYGBgCgpgYGB7cn0KY29wbG90KHJlc2lkIH4gbG5faW5jb21lIHwgZWR1Y2F0aW9uLAogICAgICAgcGFuZWwgPSBmdW5jdGlvbih4LCB5LCAuLi4pIHsKICAgICAgICAgcG9pbnRzKHgsIHkpCiAgICAgICAgIGFibGluZShsbSh5IH4geCksIGNvbCA9ICJibHVlIikKICAgICAgIH0sCiAgICAgICBkYXRhID0gcHJlc3RpZ2VfcmVzaWQsCiAgICAgICByb3dzID0gMSkKYGBgCgoKKiBvdmVyIHByZWRpY3RpbmcgYXQgaGlnaGVyIGluY29tZQoqIHVuZGVyIHByZWRpY3RpbmcgYXQgbG93ZXIgaW5jb21lCgoKYGBge3J9CnByZXN0aWdlX3Jlc2lkICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBlZHVjYXRpb24sCiAgICAgICAgICAgICB5ID0gcmVzaWQsCiAgICAgICAgICAgICBjb2xvdXIgPSB0eXBlKSkgKwogIGdlb21fcG9pbnQoKSsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IEZBTFNFKQpgYGAKCiogdGhlcmUgaXMgYSBkaWZmZXJlbmNlIGluIHR5cGUgbGV2ZWxzCiogbW9kZWwgY291bGQgYmVuZWZpdCBmcm9tIGluY2x1ZGluZyB0aGlzIAoKCmBgYHtyfQpwcmVzdGlnZV9yZXNpZCAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gbG5faW5jb21lLAogICAgICAgICAgICAgeSA9IHJlc2lkLAogICAgICAgICAgICAgY29sb3VyID0gdHlwZSkpICsKICBnZW9tX3BvaW50KCkrCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkKYGBgCgoqIHRoZXJlIGlzIGEgZGlmZmVyZW5jZSBpbiB0eXBlIGxldmVscwoqIG1vZGVsIGNvdWxkIGJlbmVmaXQgZnJvbSBpbmNsdWRpbmcgdGhpcyAKCkluY2x1ZGluZyB0aGUgaW50ZXJhY3Rpb25zCgpgYGB7cn0KbW9kNGEgPC0gbG0ocHJlc3RpZ2UgfiBlZHVjYXRpb24gKyBsbl9pbmNvbWUgKyB0eXBlICsgZWR1Y2F0aW9uOmxuX2luY29tZSwKICAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kNGEpCmBgYAoKKiBlZHVjYXRpb246bG5faW5jb21lIGlzIG9ubHkgYSAnLicgc2lnbmlmaWNhbmNlCiAgKyBtYXliZSBkb24ndCBpbmNsdWRlCgoKYGBge3J9Cm1vZDRiIDwtIGxtKHByZXN0aWdlIH4gZWR1Y2F0aW9uICsgbG5faW5jb21lICsgdHlwZSArIGVkdWNhdGlvbjp0eXBlLAogICAgICAgICAgICBkYXRhID0gcHJlc3RpZ2VfbG9nKQoKc3VtbWFyeShtb2Q0YikKYGBgCgoKCmBgYHtyfQptb2Q0YyA8LSBsbShwcmVzdGlnZSB+IGVkdWNhdGlvbiArIGxuX2luY29tZSArIHR5cGUgKyB0eXBlOmxuX2luY29tZSwKICAgICAgICAgICAgZGF0YSA9IHByZXN0aWdlX2xvZykKCnN1bW1hcnkobW9kNGMpCmBgYAoKCmBgYHtyfQphdXRvcGxvdChtb2Q0YykKYGBgCgoKYGBge3J9CmFub3ZhKG1vZDNhLCBtb2Q0YykKYGBgCgoKUmVsYXRpdmUgSW1wb3J0YW5jZQoKYGBge3J9CmxpYnJhcnkocmVsYWltcG8pCgpjYWxjLnJlbGltcChtb2QzYSwgdHlwZSA9ICJsbWciLCByZWxhID0gVFJVRSkKYGBgCgoKYGBge3J9CmNhbGMucmVsaW1wKG1vZDRjLCB0eXBlID0gImxtZyIsIHJlbGEgPSBUUlVFKQpgYGAKCgoKCg==